clear all
set more off, perm
local mydir "`1'"
global Tables 	`mydir'/TS/Tables
global Work   	`mydir'/TS/DataWork
global Raw    	`mydir'/TS/DataRaw
global WRDS  	`mydir'/WRDS
/* ********************************************************************* */
* [1] LOAD ANNUAL COMPUSTAT (all of CRSP/COMPUSTAT downloaded from WRDS online; will use annual data when quarterly data is unavailable)
gzuse using $WRDS/Compustat/crsp_a_ccm_a.dta.gz, clear   
rename *, lower

keep conm* lpermco lpermno datadate fyr linkprim liid linktype dvpsx_f at ceq cogs xint xsga csho ib itcb lt ni pstk* sale seq txdb txditc 
rename lpermco permco
rename lpermno permno

duplicates drop

label var dvpsx_f "Dividends per Share - Ex-Date - Fiscal"
/* ************************* */
* deal with crsp-compustat code for missing values
ds dvpsx_f at ceq cogs xint xsga csho ib itcb lt ni pstk* sale seq txdb txditc 
local varlist `r(varlist)'
foreach var in `r(varlist)' {
    replace `var' = . if `var' == 0.0001 | `var' == 0.0002 | `var' == 0.0003 | `var' == 0.0004 | `var' == 0.0008 
}
/* ************************* */
* compute book equity per share
generate pfd=pstkr		  					/* redemption value */	
replace  pfd=pstkl				if pfd==. 		/* liquidating value */
replace  pfd=pstk				if pfd==. 		/* carrying value */
replace  pfd=0					if pfd==.

generate steq=seq							/* Stockholders Equity */
replace  steq=ceq+pfd				if steq==.		/* Common/Ordinary Equity + Preferred  */
replace  steq=at-lt 				if steq==.		/* assets minus liabilities */

generate dtax=txditc							/* taxes and investment tax credit */
egen tmp=rowtotal(txdb itcb)						/* deferred taxes + investment tax credit */
replace  dtax=tmp 				if dtax==.
replace  dtax=0					if dtax==.
generate  bookeq_ann=(steq+dtax-pfd)
label var bookeq_ann "Book value of equity (annual)"
/* ************************* */
* deal with firms that change FYRs (so that I can tsset the data)
gen  yr=year(datadate)
egen id=group(permco permno)
gsort id yr -datadate
by id yr: gen obs=_n
keep if obs==1
************************
tsset id yr

gen inv_ann=at/L1.at-1
foreach var of varlist cogs xint xsga {
	replace `var'=0 if `var'==.
}
gen op_ann =(sale-cogs-xsga-xint)/bookeq_ann
drop xint xsga
* **********************
* compute profitability
generate opp=(sale-cogs)
generate mgn=(sale-cogs)/at
* **********************
* compute 3-year moving average
gen ma1_mgn=L1.mgn
gen ma2_mgn=ma1_mgn+L2.mgn
gen ma3_mgn=ma2_mgn+L3.mgn

replace ma1_mgn=ma1_mgn/2
replace ma2_mgn=ma2_mgn/2
replace ma3_mgn=ma3_mgn/3

label var mgn 		"(sale-cogs)/at"
label var ma1_mgn 	"1-year moving average (sale-cogs)/at"
label var ma2_mgn 	"2-year moving average (sale-cogs)/at"
label var ma3_mgn 	"3-year moving average (sale-cogs)/at"
/* ************************* */
keep conm* permco permno datadate dvpsx_f sale ib ni csho bookeq_ann mgn ma*_mgn opp op_ann inv_ann
/* ********************************************************************* */
save $Work/tmp, replace
/* ********************************************************************* */
* [2] LOAD QUARTERLY COMPUSTAT
gzuse using $WRDS/Compustat/crsp_a_ccm_q.dta.gz, clear   
rename *, lower

keep 	conm*	lpermco	lpermno	cur*	cshoq	atq	adjex*	dvpsxq	saleq	cogsq 	ibq 	niq 	datadate	datacqtr	datafqtr /// 
	rdq 	fyr 	fyrc 	fyearq	fqtr	pstk*	seq*	ceq*	atq	ltq	txdbq	txditcq	itc*		linkprim	linktype ///
	xsgaq	xintq	


label var dvpsxq "Div per Share - Exdate - Quarter"
rename lpermco permco
rename lpermno permno
keep if permco+permno<.
* 
keep if index(curcdq, "USD")>0
keep if saleq<. | dvpsxq<. | ibq <.

* get rid of duplicate obs 
duplicates drop permco permno datadate saleq dvpsxq ibq, force

duplicates tag permco permno datadate, gen(mf)
tabulate mf

*linktype: "LD"  Duplicate link to a security. Another GVKEY/IID is a better link to that CRSP 
drop if mf>0 & index(linktype,"LD")>0

*linkprim: Primary issue marker for the link.
drop if mf>0 & index(linkprim,"P")==0

* may have multiple records when firms change fiscal years
drop if mf>0 & fyr!=fyrc

* check whether i got rid of duplicates
drop mf
duplicates tag permco permno datadate, gen(mf)
tabulate mf
drop mf linkprim linktype
/* **************************************************** */
* must have only one observation per quarter -- else tsset doesn't work
* problems are caused by firms that change fiscal year
/* **************************************************** */
gen cq=qofd(datadate)
gen fq=quarterly(datafqtr,"YQ")
format cq fq %tq
order datadate fq cq

duplicates drop permco permno cq, force
*
egen id=group(permco permno)
tsset id cq 

* note: dvpsxq is "Div per Share - Exdate - Quarter"
gen div=dvpsxq*cshoq  
gen sales=saleq+L1.saleq+L2.saleq+L3.saleq   
gen earnings =ibq+L1.ibq+L2.ibq+L3.ibq  
gen earning2 =niq+L1.niq+L2.niq+L3.niq  
gen dividends=div+L1.div+L2.div+L3.div
/* ******************** */
* compute book equity per share
generate pfd=pstkrq		  					/* redemption value */	
replace  pfd=pstkq				if pfd==. 		/* carrying value */
replace  pfd=0					if pfd==.

generate steq=seqq							/* Stockholders Equity */
replace  steq=ceqq+pfd				if steq==.		/* Common/Ordinary Equity + Preferred  */
replace  steq=atq-ltq 				if steq==.		/* assets minus liabilities */

generate dtax=txditcq							/* taxes and investment tax credit */
egen tmp=rowtotal(txdbq itccy)						/* deferred taxes + investment tax credit */
replace  dtax=tmp 				if dtax==.
replace  dtax=0					if dtax==.
generate book=(steq+dtax-pfd)
generate bookeq=book+L1.book+L2.book+L3.book
label var bookeq "Book value of equity (quarterly)"
/* ************************* */
gen inv=atq/L4.atq-1	if L4.atq>0
foreach var in cogsq xintq xsgaq {
	replace `var'=0 if `var'==.
}
foreach name in "sale" "cogs" "xint" "xsga" {
	gen `name'=`name'q+L1.`name'q+L2.`name'q+L3.`name'q
}
gen op =(sale-cogs-xsga-xint)/bookeq
drop xintq xsgaq
* ****************************
* compute profitability
generate opp=(saleq+L1.saleq+L2.saleq+L3.saleq)-(cogsq+L1.cogsq+L2.cogsq+L3.cogsq)
generate mgn=opp/((atq+L1.atq+L2.atq+L3.atq)/4)

* compute 3-year moving average
gen ma1_mgn=0
forvalues x=1(1)4 {
	replace ma1_mgn=ma1_mgn+L`x'.mgn
}
gen ma2_mgn=ma1_mgn
forvalues x=5(1)8 {
	replace ma2_mgn=ma2_mgn+L`x'.mgn
}
gen ma3_mgn=ma2_mgn
forvalues x=9(1)12 {
	replace ma3_mgn=ma3_mgn+L`x'.mgn
}
replace ma1_mgn=ma1_mgn/04
replace ma2_mgn=ma2_mgn/08
replace ma3_mgn=ma3_mgn/12
label var mgn 		"(sale-cogs)/at"
label var ma1_mgn 	"1-year moving average (sale-cogs)/at"
label var ma2_mgn 	"2-year moving average (sale-cogs)/at"
label var ma3_mgn 	"3-year moving average (sale-cogs)/at"
/* ************************* */
keep conm* permco permno datadate datafqtr cq fq sales earning* dividends dvpsxq ibq bookeq mgn ma*_mgn opp inv op
/* ******************** */
* here is where I use annual data to replace missing quarterly data
merge 1:1 permco permno datadate using $Work/tmp, keepusing(permco permno datadate sale ni ib dvpsx_f csho bookeq_ann mgn ma*_mgn inv_ann op_ann)
replace sales=sale		if sales==.
replace earnings =ib 		if earnings==.
replace earning2 =ni 		if earning2==.
replace dividends=dvpsx_f*csho 	if dividends==.
replace bookeq=bookeq_ann	if bookeq==.
replace inv=inv_ann		if inv==.
replace op =op_ann		if op ==.
drop sale ib csho dvpsx_f 
/* ************************************************ */
egen id=group(permco permno)

gen date=mofd(datadate)
format date %tm

tsset id date
tsfill

foreach var of varlist permco permno {
	replace `var'=L1.`var' if missing(`var')==1
}
sort id date
foreach var of varlist con* {
	replace `var'=`var'[_n-1] if missing(`var')==1
}
foreach var of varlist sales earnings earning2 dividends bookeq mgn ma*_mgn opp inv op { 
	by id: ipolate `var' date, gen(ipolated)
	replace `var'=ipolated if `var'==.
	drop ipolated
}

* set datadate to last day of the month for date
replace datadate=dofm(date+1)-1 if datadate==.

keep conm* permco permno datadate sales earning* dividends bookeq mgn ma*_mgn opp inv op
/* ************************************************ */
save $Work/cs02, replace
/* ************************************************ */
* Restrict sample to firms on the S&P index; /srv/udpate/crsp500.dta generated by cs01.do
gen date=datadate
format date %td

merge 1:1 permco permno date using $Work/crsp500.dta, keepusing(permco permno date mcap shrout start ending)
keep if _merge==3
drop _merge 
tempfile tmp
save "`tmp'", replace

* add CRSP name
import sas using $WRDS/CRSP/stocknames.sas7bdat, clear case(lower)
joinby permco permno using "`tmp'"
keep if date>=namedt & date<=nameenddt 
/* ********************************* */
* add data for the index (total market cap and level of the index)
tempfile tmp
save "`tmp'", replace
import sas caldt totval spindx vwretd vwretx sprtrn using "/mnt/brownresearch/ECON_LaPorta/rlaporta/research/WRDS/CRSP/msp500.sas7bdat", clear
rename caldt date
keep date totval spindx
tsset date
tsfill
replace totval=L.totval if totval==.
replace spindx=L.spindx if spindx==.

merge 1:m date using "`tmp'"
drop if _merge==1
drop _merge date
/* ********************************* */
generate divisor=totval/spindx

count if earnings==. 

generate ActualS=    sales/divisor
generate ActualE= earnings/divisor
generate Actual2= earning2/divisor
generate ActualD=dividends/divisor
generate ActualB=   bookeq/divisor

generate  mcapAE=mcap if ActualE<.
generate  mcapA2=mcap if Actual2<.
generate  mcapAD=mcap if ActualD<.
generate  mcapAS=mcap if ActualS<.
generate  mcapAB=mcap if ActualB<.

bysort datadate: egen totmgn=total(mcap) if mgn<.
generate  wmg=mgn*(mcap/totmgn) 
drop totmgn

bysort datadate: egen totmgn=total(mcap) if ma1_mgn<.
generate  wma1=ma1_mgn*(mcap/totmgn) 
drop totmgn
bysort datadate: egen totmgn=total(mcap) if ma2_mgn<.
generate  wma2=ma2_mgn*(mcap/totmgn) 
drop totmgn
bysort datadate: egen totmgn=total(mcap) if ma3_mgn<.
generate  wma3=ma3_mgn*(mcap/totmgn) 
drop totmgn

collapse (sum) Actual* mcapA* mgn=wmg ma1_mgn=wma1 ma2_mgn=wma2 ma3_mgn=wma3 (lastnm) totval, by(datadate)

replace ActualS=1000*ActualS*(totval/mcapAS)  
replace ActualE=1000*ActualE*(totval/mcapAE)  
replace Actual2=1000*Actual2*(totval/mcapA2)
replace ActualD=1000*ActualD*(totval/mcapAD)
replace ActualB=1000*ActualB*(totval/mcapAB)

tsset datadate 
gen E_LTM=L90.ActualE

gen date=datadate
format date %td
gen datem=mofd(datadate)
format datem %tm
order datadate datem
/* ********************************* */
save $Work/cs02_tot, replace
/* ********************************* */
!rm $Work/tmp.dta -f
/* ********************************* */	
